Sys.setenv(LANG = "en") #English
knitr::opts_chunk$set(echo = TRUE)rm(list = ls())
path <- getwd()
setwd(path)
# packages
pacman::p_load(tidyverse, plotly,readxl,scales, extrafont,PerformanceAnalytics, GGally, patchwork, ggpubr, ggrepel, stargazer, kableExtra,modelsummary)
# Font for windows and mac
if (stringr::str_detect(path, pattern="Users")){
theme_set(theme_classic(base_size = 10, base_family = "HiraginoSans-W3")) # For Mac OS
} else{
theme_set(theme_classic(base_size = 10, base_family = "Arial")) # For Windows
} df_analysis <- readr::read_csv("output/df_analysis.csv")## Rows: 1551 Columns: 149
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (4): prefec_kanji, prefecture, prefec, prefec_kanji2
## dbl (144): id, month, year, suicide_total, suicide_male, suicide_female, su...
## date (1): date
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_analysis_desc_stat <- df_analysis %>%
select(date, prefec_kanji, # date and prefecture
unemploy_shock_diff2, # main treatment
job_seeker_total_shock, # alternative treatment
suicide_rate, # suicide
suicide_rate_female,
suicide_rate_male,
unemp_benefit_number_total, # unemployment benefit
unemp_benefit_number_female,
unemp_benefit_number_male,
yoy_unemp_benefit_number_total, # unemployment benefit(YOY) 2021Nov11 added
yoy_unemp_benefit_number_female, #2021Nov11 added
yoy_unemp_benefit_number_male, #2021Nov11 added
koguchi_number, # Emergency small-amount fund / 緊急小口資金
sogo_number, # General support fund / 総合支援資金
jukyo_number, # Housing Security Benefits / 住居確保給付金
persons_receive, # Public Assistance recipients / 生活保護受給者数
households_receive, # Public Assistance recipient households / 生活保護受給世帯数
yoy_persons_receive, #2021Nov11 追加
yoy_households_receive, #2021Nov11 追加
infection_rate_cumulative2020jun, # COVID-19 cumulative infection rate up to June 2020
death_rate_cumulative2020jun, # COVID-19 cumulative death rate up to June 2020
google_mobility_index_2020may, # Google Mobility index at the end of May 2020
Population_per_1_km_2_of_inhabitable_area, # Population density, measured based on inhabitable area
Secondary_industry_ratio, # secondary industry ratio
Tertiary_industry_ratio, # tertiary (service) industry ratio
Ratio_of_aged_population, # ratio of aged (65+) to working age (1? - 64)
Total_population) # total population# 2021Sep30
# Keep only one month data for cross-sectional treatment and control variables / 処置変数とコントロール変数は1月分だけ残す(目視チェック)
df_analysis_desc_stat <- df_analysis_desc_stat %>%
dplyr::mutate(unemploy_shock_diff2 = case_when(date == "2019-01-01" ~ unemploy_shock_diff2)) %>%
dplyr::mutate(job_seeker_total_shock = case_when(date == "2019-01-01" ~ job_seeker_total_shock)) %>%
dplyr::mutate(google_mobility_index_2020may = case_when(date == "2019-01-01" ~ google_mobility_index_2020may)) %>%
dplyr::mutate(infection_rate_cumulative2020jun = case_when(date == "2019-01-01" ~ infection_rate_cumulative2020jun)) %>%
dplyr::mutate(death_rate_cumulative2020jun = case_when(date == "2019-01-01" ~ death_rate_cumulative2020jun)) %>%
dplyr::mutate(Population_per_1_km_2_of_inhabitable_area = case_when(date == "2019-01-01" ~ Population_per_1_km_2_of_inhabitable_area)) %>%
dplyr::mutate(Secondary_industry_ratio = case_when(date == "2019-01-01" ~ Secondary_industry_ratio)) %>%
dplyr::mutate(Tertiary_industry_ratio = case_when(date == "2019-01-01" ~ Tertiary_industry_ratio)) %>%
dplyr::mutate(Ratio_of_aged_population = case_when(date == "2019-01-01" ~ Ratio_of_aged_population)) %>%
dplyr::mutate(Total_population = case_when(date == "2019-01-01" ~ Total_population))# summary stat: skimr
#skimr::skim(df_analysis_desc_stat)
# summary stat: text
# 処置変数とコントロール変数の記述統計量(html表示用)
stargazer::stargazer(data.frame(df_analysis_desc_stat),
type = "text",digits = 3,
summary.stat = c("n", "mean","sd","median","min", "max"),
covariate.labels = c("Employment shock (baseline)",
"Employment shock (full-time)",
"Suicide rate (total)",
"Suicide rate (female)",
"Suicide rate (male)",
"Unemployment benefit (total)",
"Unemployment benefit (female)",
"Unemployment benefit (male)",
"Unemployment bene. (YOY, total)",
"Unemployment benefit (YOY, female)",
"Unemployment benefit (YOY, male)",
"Emergency S.A. Funds",
"General Support Funds",
"Housing Security Benefit",
"Public assistance (recipients)",
"Public assistance (households)",
"Public assistance (YOY, recipients)",
"Public assistance (YOY, households)",
"Cumulative infection rate",
"Cumulative death rate",
"Google Mobility index",
"Population density",
"Ratio of employees (secondary)",
"Ratio of employees (service)",
"Elderly dependency rate",
"Total population")) ##
## ==========================================================================================
## Statistic N Mean St. Dev. Median Min Max
## ------------------------------------------------------------------------------------------
## Employment shock (baseline) 47 0.247 0.418 0.162 -0.767 1.227
## Employment shock (full-time) 47 0.080 0.082 0.109 -0.139 0.216
## Suicide rate (total) 1,551 1.356 0.376 1.322 0.177 3.266
## Suicide rate (female) 1,551 0.781 0.342 0.758 0.000 2.165
## Suicide rate (male) 1,551 1.971 0.626 1.896 0.000 4.686
## Unemployment benefit (total) 1,551 326.385 61.237 319.203 202.109 526.635
## Unemployment benefit (female) 1,551 375.744 73.201 368.638 225.504 607.705
## Unemployment benefit (male) 1,551 273.122 51.882 264.760 176.699 454.545
## Unemployment bene. (YOY, total) 1,551 12.251 38.010 3.680 -137.246 191.956
## Unemployment benefit (YOY, female) 1,551 9.552 39.779 1.292 -175.598 206.215
## Unemployment benefit (YOY, male) 1,551 15.196 38.510 5.725 -94.493 187.694
## Emergency S.A. Funds 893 28.505 55.847 0.697 0.000 567.447
## General Support Funds 893 14.903 36.930 0.034 0.000 426.290
## Housing Security Benefit 423 6.130 9.494 2.782 0.000 81.259
## Public assistance (recipients) 1,551 1,398.269 666.371 1,303.267 332.386 3,248.442
## Public assistance (households) 1,551 1,103.915 514.651 993.016 289.867 2,516.684
## Public assistance (YOY, recipients) 1,551 -10.959 18.902 -9.456 -61.430 46.458
## Public assistance (YOY, households) 1,551 3.012 11.851 2.311 -21.452 50.504
## Cumulative infection rate 47 8.512 8.556 6.084 0.000 44.717
## Cumulative death rate 47 0.433 0.633 0.123 0.000 2.373
## Google Mobility index 47 -22.180 4.864 -22.016 -39.048 -12.871
## Population density 47 1,350.738 1,781.228 795.600 234.700 9,792.900
## Ratio of employees (secondary) 47 0.245 0.049 0.235 0.138 0.331
## Ratio of employees (service) 47 0.660 0.039 0.665 0.601 0.735
## Elderly dependency rate 47 53.502 7.508 53.900 35.000 70.100
## Total population 47 268.489 277.929 160.000 56.000 1,392.000
## ------------------------------------------------------------------------------------------
# 2021Oct7
# Tretment, Outcome, Covariate
# modelsummary with kableExtra
# kableExtra::pack_rows
# kableExtra::add_footnote cannot be used (unsolved)
table_desc_stat <- modelsummary::datasummary(
(`Employment shock (baseline)` = unemploy_shock_diff2) +
(`Employment shock (full-time)` = job_seeker_total_shock) +
(`Suicide rate (total)` = suicide_rate) +
(`Suicide rate (female)` = suicide_rate_female) +
(`Suicide rate (male)` = suicide_rate_male) +
(`Unemployment benefit (total)` = unemp_benefit_number_total) +
(`Unemployment benefit (female)` = unemp_benefit_number_female) +
(`Unemployment benefit (male)` = unemp_benefit_number_male) +
(`Unemployment benefit (total, yoy)` = yoy_unemp_benefit_number_total) +
(`Unemployment benefit (female, yoy)` = yoy_unemp_benefit_number_female) +
(`Unemployment benefit (male, yoy)` = yoy_unemp_benefit_number_male) +
(`Emergency Small Ammount Funds` = koguchi_number) +
(`General Support Funds` = sogo_number) +
(`Housing Security Benefit` = jukyo_number) +
(`Public assistance (recipients)` = persons_receive) +
(`Public assistance (households)` = households_receive) +
(`Public assistance (recipients, yoy)` = yoy_persons_receive) +
(`Public assistance (households, yoy)` = yoy_households_receive) +
(`Cumulative infection rate` = infection_rate_cumulative2020jun) +
(`Cumulative death rate` = death_rate_cumulative2020jun) +
(`Google Mobility index` = google_mobility_index_2020may) +
(`Population density` = Population_per_1_km_2_of_inhabitable_area) +
(`Ratio of employees (secondary)` = Secondary_industry_ratio) +
(`Ratio of employees (service)` = Tertiary_industry_ratio) +
(`Elderly dependency rate` = Ratio_of_aged_population) +
(`Total population` = Total_population) ~
N + Mean + (Std.Dev. = SD) + Min + Max,
label = "tab:summary_stat",
title = "Summary Statistics",
data = df_analysis_desc_stat,
output = 'latex') %>%
kableExtra::pack_rows("Treatment",1,2) %>%
kableExtra::pack_rows("Outcome",3,18) %>%
kableExtra::pack_rows("Covariate",19,26) %>%
kableExtra::add_footnote(c("\\parbox[t]{\\textwidth}{Notes: The employment shock is a cross-section variable calculated based on equation \\eqref{eq:employment_shock}. All the outcome variables are calculated per 100,000 population. For the definition of each variable, see Table \\ref{tab:data_source} in Appendix \\ref{sec:background_info}. Outcome variables such as suicide rates, unemployment benefits, and Public assitance recipients are 21-months data (from January 2019 to September 2020) while the variables of Emergency Small Amount Funds, General Support Funds, and Housing Security Benefit are more restricted due to data limitation. ``yoy'' means year-on-year difference. \\\\
Sources: See Table \\ref{tab:data_source} in Appendix \\ref{sec:background_info}.}"),threeparttable = TRUE, notation = "none",escape = FALSE)## Warning: To compile a LaTeX document with this table, the following commands must be placed in the document preamble:
##
## \usepackage{booktabs}
## \usepackage{siunitx}
## \newcolumntype{d}{S[input-symbols = ()]}
##
## To disable `siunitx` and prevent `modelsummary` from wrapping numeric entries in `\num{}`, call:
##
## options("modelsummary_format_numeric_latex" = "plain")
##
## This warning is displayed once per session.
cat(table_desc_stat, sep = '\n', file = "output/table_summary_stat.tex")# 1月分のデータ抽出
df_employ_shock <- df_analysis %>% filter(date == "2019-01-01")g_unemploy_shock <- ggplot2::ggplot(df_employ_shock, aes(x = unemploy_shock_diff2, y = reorder(prefec, unemploy_shock_diff2))) +
geom_bar(stat = "identity") +
theme(legend.position = 'none') +
theme_light() +
labs(title = "(a) Employment shock", x = "Employment shock measured by the unemployment rate",y = "")
ggplotly(g_unemploy_shock)#Extract data / データ抽出
df_unemploy_shock_diff2 <- df_employ_shock %>%
select(prefec_kanji, unemploy_shock_diff2)
# save data as csv for check
write_excel_csv(df_unemploy_shock_diff2, "output/unemploy_shock_diff2.csv")scatter_treat_outcome = function(dataset, treat_var, outcome_var,
point_size_var, main_title, x_title, y_title){
dataset %>%
ggplot(aes(x = treat_var, y = outcome_var, size = point_size_var, label = prefec)) +
geom_point(color = "black", shape = 21, fill = "blue1", alpha = 0.5) +
geom_smooth(method = "lm", se = FALSE, color = "gray20", linetype = "dashed") +
labs(title = main_title, x = x_title, y = y_title) +
theme_classic() +
scale_size(range = c(2,12)) +
theme(legend.position = 'none') +
geom_vline(xintercept= 0, colour = "gray") +
geom_hline(yintercept= 0, colour = "gray")
}df_correlation <- df_analysis %>%
dplyr::filter(date == "2020-01-01") %>%
dplyr::select(unemploy_shock_diff2, job_seeker_total_shock, population_total, prefec)graph_unemploy_job_seeker_plot <- scatter_treat_outcome(dataset = df_correlation,
treat_var = df_correlation$unemploy_shock_diff2,
outcome_var = df_correlation$job_seeker_total_shock,
point_size_var = df_correlation$population_total,
main_title = "",
x_title = "Baseline employment shock",
y_title = "''Full-time'' employment shock") +
geom_text_repel(size=3)
ggplotly(graph_unemploy_job_seeker_plot)## `geom_smooth()` using formula 'y ~ x'
## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomTextRepel() has yet to be implemented in plotly.
## If you'd like to see this geom implemented,
## Please open an issue with your example code at
## https://github.com/ropensci/plotly/issues
# save as PDF
ggsave(file = "output/graph_unemploy_job_seeker_plot.pdf", plot = graph_unemploy_job_seeker_plot,
dpi = 100, width = 6, height = 5) ## `geom_smooth()` using formula 'y ~ x'
reg1 <- estimatr::lm_robust(job_seeker_total_shock ~ unemploy_shock_diff2,
data = df_correlation, se_type = "stata")
texreg::screenreg(list(reg1), include.ci = FALSE, digits = 3)##
## ================================
## Model 1
## --------------------------------
## (Intercept) 0.071 ***
## (0.016)
## unemploy_shock_diff2 0.039
## (0.026)
## --------------------------------
## R^2 0.040
## Adj. R^2 0.019
## Num. obs. 47
## RMSE 0.082
## ================================
## *** p < 0.001; ** p < 0.01; * p < 0.05
# 2020.7月分のデータ抽出 for 自殺率YOY)
df_analysis_2020jul <- df_analysis %>% filter(date == "2020-07-01")# 2021Aug17
# Total / 全体
graph_unemp_suicide_rate_total <- scatter_treat_outcome(dataset = df_analysis_2020jul,
treat_var = df_analysis_2020jul$unemploy_shock_diff2,
outcome_var = df_analysis_2020jul$yoy_suicide_rate,
point_size_var = df_analysis_2020jul$population_total,
main_title = "(b) Total suicides",
x_title = "Employment shock measured by the unemployment rate",
y_title = "Change in the suicide rate") +
scale_y_continuous(limits = c(-1.5, 1.5),
breaks=c(-1.5, -1.0, -0.5, 0.0,
0.5, 1.0, 1.5))
ggplotly(graph_unemp_suicide_rate_total)## `geom_smooth()` using formula 'y ~ x'
# Female / 女性
graph_unemp_suicide_rate_female <- scatter_treat_outcome(dataset = df_analysis_2020jul,
treat_var = df_analysis_2020jul$unemploy_shock_diff2,
outcome_var = df_analysis_2020jul$yoy_suicide_rate_female,
point_size_var = df_analysis_2020jul$population_total,
main_title = "(c) Female suicides",
x_title = "Employment shock measured by the unemployment rate",
y_title = "Change in the suicide rate")+
scale_y_continuous(limits = c(-1.5, 1.5),
breaks=c(-1.5, -1.0, -0.5, 0.0,
0.5, 1.0, 1.5))
ggplotly(graph_unemp_suicide_rate_female)## `geom_smooth()` using formula 'y ~ x'
# Male / 男性
graph_unemp_suicide_rate_male <- scatter_treat_outcome(dataset = df_analysis_2020jul,
treat_var = df_analysis_2020jul$unemploy_shock_diff2,
outcome_var = df_analysis_2020jul$yoy_suicide_rate_male,
point_size_var = df_analysis_2020jul$population_total,
main_title = "(d) Male suicides",
x_title = "Employment shock measured by the unemployment rate",
y_title = "Change in the suicide rate") +
scale_y_continuous(limits = c(-1.5, 1.5),
breaks=c(-1.5, -1.0, -0.5, 0.0,
0.5, 1.0, 1.5))
ggplotly(graph_unemp_suicide_rate_male)## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
# Bind and save scatter plots / 散布図の統合と保存
## bind and save graphs
graph_unemp_suicide_bind <- (g_unemploy_shock + graph_unemp_suicide_rate_total) / (graph_unemp_suicide_rate_female + graph_unemp_suicide_rate_male)
graph_unemp_suicide_bind ## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
ggsave(file = "output/graph_unemp_suicide_YOY_Jul.pdf", plot = graph_unemp_suicide_bind,
dpi = 100, width = 12, height = 12) ## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).